##
simulate_A_array<-F
sim_no_friends<-F
sim_random<-F
sim_reassign_links<-F
sim_links_same_zeroes<-T
sim_assortative<-F
compute_equilibrium_effects<-F

## summary stats: conditional means of study time by race, high school gpa high, hs combact high
## given parameters and hs data, simulates model
  solution<-calculate_log_likelihood(parameters_readin$estimate[parameters_readin$fixed==F])
  parameters_new<-relist(parameters_readin$estimate, parameters)

### counterfactual A_arrays
if (simulate_A_array==TRUE){
  
  ## initialize so everyone has no friends
  A_array_months[,,1:n_months]<-0 

  if(sim_no_friends==F){
    for (month in 1:n_months)
    {
      ## very rough simulation
      if(sim_random==T){
        ## number of friends for each person, in time period
        set.seed(8643 + month)
        sim_num_friends<-rbinom(n_students,n_students,1.10/n_students)
        ## ids of friends for each person, in time period
        sim_friend_ids<-sapply(sim_num_friends, function(x) sample(n_students,x))
        
        ## fill in A_array_months with 1's for ids with sapply ==1 
        for (i in 1:n_students)
        {
          A_array_months[i,sim_friend_ids[[i]],month]<-1
        }
      A_array_months[,,month]<-pmax(A_array_months[,,month],t(A_array_months[,,month]))
      }
      if(sim_reassign_links==T){
      ## gather existing links in top triangle, reassign them  
        set.seed(8643 + month)
        
        friends_list<-vector("list", n_students)
        for (i in 1:(n_students-1))
        {
          friends_list[[i]]<-which(A_array_months_data[i,(i+1):n_students,month]==1)+i
        }
        
        friends_vec<-unlist(friends_list)
        n_friends<-rowSums(A_array_months_data[,,month])
        
        for (i in n_students:1)
        {
          if (length(friends_list[[i]])>0 & length(friends_vec[friends_vec>i])>=length(friends_list[[i]])) {
            this_guys_sim_friends<-sample(friends_vec[friends_vec>i], length(friends_list[[i]]),replace=F)
            A_array_months[i,this_guys_sim_friends,month]<-1
            ## remove these from friends_vec
            friends_vec<-friends_vec[-match(this_guys_sim_friends,friends_vec)]
          }
        }
        ## symmetrize
        A_array_months[,,month]<-pmax(A_array_months[,,month],t(A_array_months[,,month]))
      }
      if(sim_links_same_zeroes==T){
        ## simulate a bunch of links, replace ones with 0 links in data with 0
        set.seed(8643 + month)
        dist_vec<-rowSums(A_array_months_data[,,month])
        dist_vec<-dist_vec[dist_vec>0]
        dist_vec<-round(dist_vec/2)
        sim_num_friends<-sample(dist_vec, n_students, replace=T)
        ## ids of friends for each person, in time period
        sim_friend_ids<-sapply(sim_num_friends, function(x) sample(n_students,x))
        
        ## fill in A_array_months with 1's for ids with sapply ==1 
        for (i in 1:n_students)
        {
          A_array_months[i,sim_friend_ids[[i]],month]<-1
        }
        ## remove friends for students with 0 friends in data
        A_array_months[rowSums(A_array_months_data[,,month])==0,,month]<-0
        A_array_months[,colSums(A_array_months_data[,,month])==0,month]<-0
        
        ## symmetrize
        A_array_months[,,month]<-pmax(A_array_months[,,month],t(A_array_months[,,month]))
      }
      
      if (sim_assortative==T){
        ## match up students based on assortative matching
        mu_study_vec<-solution$mu_study_model
        ordered_mu_study_vec<-mu_study_vec[order(mu_study_vec)]
        ## number of friends for each person, in time period
        set.seed(8643 + month)
        sim_num_friends<-rbinom(n_students,n_students,1.125/n_students)
        ## how large a subset of total students to draw from?
        caliper_size<- 2 * (max(mu_study_vec) - min(mu_study_vec))/n_students
        n_sim_friends_to_draw_from_vec<-vector(length=0)
        for (i in 1:n_students)
        {
          if(sim_num_friends[i]>0){
            ## candidate friends to sample from
            lb_tmp<-mu_study_vec[i] - caliper_size
            ub_tmp<-mu_study_vec[i] + caliper_size
            sim_friends_to_draw_from_tmp<-which(mu_study_vec>=lb_tmp & mu_study_vec<=ub_tmp)
            n_sim_friends_to_draw_from_vec<-c(n_sim_friends_to_draw_from_vec, length(sim_friends_to_draw_from_tmp))
            sim_friend_ids<-sample(sim_friends_to_draw_from_tmp,sim_num_friends[i],replace=T)
            A_array_months[i,sim_friend_ids,month]<-1
          }
        }
        A_array_months[,,month]<-pmax(A_array_months[,,month],t(A_array_months[,,month]))
      }
    }  
  }
  
} else {
  ## reset A_array to data one 
  A_array_months<-A_array_months_data
}

## recalculate solution given simulated adjacency matrix
  solution<-calculate_log_likelihood(parameters_readin$estimate[parameters_readin$fixed==F])
  parameters_new<-relist(parameters_readin$estimate, parameters)

######### model fit
# fill in s obs first
# then fill in s_not_i for each survey period
  for (survey in 1:n_study_surveys)
    {
		alpha<-parameters_readin[parameters_readin$par_name=="alpha",]$estimate
    s_obs_array[,survey]<-student_data[,paste0("s", survey)]
    s_not_i_obs_array[,survey]<-calc_s_not_own_data(A_array_months[,,month_corresponding_to_survey[survey]])(s_obs_array[,survey])
    s_model_array[,survey]  <- solution$s_array_months_model[,month_corresponding_to_survey[survey]]
    s_not_i_model_array[,survey]<-calc_s_not_own_data(A_array_months[,,month_corresponding_to_survey[survey]])(s_model_array[,survey])
		
		## ignore students with 0 friends
		if (ignore_0_friend_students==T){
		  s_obs_array[rowSums(A_array_months[,,month_corresponding_to_survey[survey]])==0,survey]<-NA     
		  s_not_i_obs_array[rowSums(A_array_months[,,month_corresponding_to_survey[survey]])==0,survey]<-NA     
		  s_model_array[rowSums(A_array_months[,,month_corresponding_to_survey[survey]])==0,survey]<-NA     
		  s_not_i_model_array[rowSums(A_array_months[,,month_corresponding_to_survey[survey]])==0,survey]<-NA     
      
      if (ignore_ever_0_friend_students==T){
        s_obs_array[rowSums(A_array_months_data[,,1])==0 | rowSums(A_array_months_data[,,2])==0,]<-NA
        s_not_i_obs_array[rowSums(A_array_months_data[,,1])==0 | rowSums(A_array_months_data[,,2])==0,]<-NA
        s_model_array[rowSums(A_array_months_data[,,1])==0 | rowSums(A_array_months_data[,,2])==0,]<-NA
        s_not_i_model_array[rowSums(A_array_months_data[,,1])==0 | rowSums(A_array_months_data[,,2])==0,]<-NA
      }
      
		}
		  
		rm(alpha)
    }
  
  s_obs_avg<-apply(s_obs_array,1,mean, na.rm=T)
  s_not_i_obs_avg<-apply(s_not_i_obs_array,1,mean, na.rm=T)
  
#### build dataset with student, characteristics, avg study time (real, sim), avg gpa (real, sim)
  y_model_array<-solution$y_model_array
  y_growth_array<-solution$y_growth_array
  
  ## observed y
  # check that gpa1 and gpa2 are the right columns
    stopifnot(names(student_data)[c(58,59)]==c("gpa1", "gpa2"))
  
  y_obs_array<-array(NA, dim=c(n_students, 2))
  y_obs_array[,1]<-student_data[,"gpa1"]
  y_obs_array[,2]<-student_data[,"gpa2"]

  ## ignore students with 0 friends
  for (semester in 1:2)
  {
    if (ignore_0_friend_students==T){
    y_obs_array[rowSums(A_array_months[,,semester])==0,semester]<-NA     
    y_model_array[rowSums(A_array_months[,,semester])==0,semester]<-NA     
    y_growth_array[rowSums(A_array_months[,,semester])==0,semester]<-NA     
    
    if (ignore_ever_0_friend_students==T){
      y_obs_array[rowSums(A_array_months_data[,,1])==0 | rowSums(A_array_months_data[,,2])==0,]<-NA
      y_model_array[rowSums(A_array_months_data[,,1])==0 | rowSums(A_array_months_data[,,2])==0,]<-NA
      y_growth_array[rowSums(A_array_months_data[,,1])==0 | rowSums(A_array_months_data[,,2])==0,]<-NA
      }
    }
  }

  ## for comparability, replace model and sim arrays with NA for those missing obs
  s_model_array[is.na(s_obs_array)]<-NA
  s_not_i_model_array[is.na(s_not_i_obs_array)]<-NA
  y_model_array[is.na(y_obs_array)]<-NA
  y_growth_array[is.na(y_obs_array)]<-NA

  ## shocks
  # study time
    set.seed(5332)

      eta_sim_vec<-rnorm(n_students*n_study_surveys, mean=0, sd=relist(parameters_readin$estimate, parameters)$sigma_eta) 
      eta_sim_array<-matrix(eta_sim_vec, ncol=n_study_surveys, byrow=F)
  
  # gpa
    set.seed(3453)
    eps_sim_vec<-rnorm(n_students*2, mean=0, sd=relist(parameters_readin$estimate, parameters)$sigma_eps) 
    eps_sim_array<-matrix(eps_sim_vec, ncol=2, byrow=F)
  
  # simulated averages for each student, study time and model
  # based on shocks
    s_sim_array<-s_model_array + eta_sim_array
    y_sim_array<-y_model_array + eps_sim_array
    
    ## make sure simulated measures stay in the bounds
    s_sim_array <-pmax(s_sim_array,0)
    y_sim_array <-pmax(y_sim_array,0)
    y_sim_array <-pmin(y_sim_array,4)
    
  for (survey in 1:n_study_surveys)
  {
		alpha<-parameters_readin[parameters_readin$par_name=="alpha",]$estimate
    s_not_i_sim_array[,survey]<-calc_s_not_own_data(A_array_months[,,month_corresponding_to_survey[survey]])(s_sim_array[,survey])
		## ignore students with 0 friends
		if (ignore_0_friend_students==T){
		  s_not_i_sim_array[rowSums(A_array_months[,,month_corresponding_to_survey[survey]])==0,survey]<-NA     
		  
      if (ignore_ever_0_friend_students==T){
		    s_not_i_sim_array[rowSums(A_array_months_data[,,1])==0 | rowSums(A_array_months_data[,,2])==0,]<-NA     
		  }
		}
		rm(alpha)
  }
  
  s_sim_avg<-rowMeans(s_sim_array, na.rm=T)
  y_sim_avg<-rowMeans(y_sim_array, na.rm=T)
  s_not_i_sim_avg<-rowMeans(s_not_i_sim_array, na.rm=T)
  y_obs_avg<-rowMeans(y_obs_array, na.rm=T)

s_summary_table<-rbind(cbind(obs=with(student_data, tapply(s_obs_avg, list(race), mean, na.rm=T)), sim=with(student_data, tapply(s_sim_avg, list(race), mean, na.rm=T))), 
cbind(obs=with(student_data, tapply(s_obs_avg, list(sex), mean, na.rm=T)), sim=with(student_data, tapply(s_sim_avg, list(sex), mean, na.rm=T))), 
cbind(obs=with(student_data, tapply(s_obs_avg, list(hsgpa_level), mean, na.rm=T)), sim=with(student_data, tapply(s_sim_avg, list(hsgpa_level), mean, na.rm=T))), 
total=c(mean(s_obs_avg, na.rm=T), mean(s_sim_avg, na.rm=T)), sd=c(sd(s_obs_avg, na.rm=T), sd(s_sim_avg, na.rm=T)), 
sd_model=c(NA,sd(solution$s_array_months_model)),
cor_s_s_not_i=with(student_data, c(cor(s_obs_avg, s_not_i_obs_avg, use="complete.obs"), cor(s_sim_avg, s_not_i_sim_avg, use="complete.obs"))))

s_not_i_summary_table<-rbind(cbind(obs=with(student_data, tapply(s_not_i_obs_avg, list(race), mean, na.rm=T)), sim=with(student_data, tapply(s_not_i_sim_avg, list(race), mean, na.rm=T))), 
                             cbind(obs=with(student_data, tapply(s_not_i_obs_avg, list(sex), mean, na.rm=T)), sim=with(student_data, tapply(s_not_i_sim_avg, list(sex), mean, na.rm=T))), 
                             cbind(obs=with(student_data, tapply(s_not_i_obs_avg, list(hsgpa_level), mean, na.rm=T)), sim=with(student_data, tapply(s_not_i_sim_avg, list(hsgpa_level), mean, na.rm=T))), total=c(mean(s_not_i_obs_avg, na.rm=T), mean(s_not_i_sim_avg, na.rm=T)), 
                             sd=c(NA,sd(s_not_i_obs_avg, na.rm=T)))


y_summary_table<-rbind(cbind(obs=with(student_data, tapply(y_obs_avg, list(race), mean, na.rm=T)), sim=with(student_data, tapply(y_sim_avg, list(race), mean, na.rm=T))), 
                       cbind(obs=with(student_data, tapply(y_obs_avg, list(sex), mean, na.rm=T)), sim=with(student_data, tapply(y_sim_avg, list(sex), mean, na.rm=T))), 
                       cbind(obs=with(student_data, tapply(y_obs_avg, list(hsgpa_level), mean, na.rm=T)), sim=with(student_data, tapply(y_sim_avg, list(hsgpa_level), mean, na.rm=T))), total=c(mean(y_obs_avg, na.rm=T), mean(y_sim_avg, na.rm=T)), sd=c(sd(y_obs_avg, na.rm=T), sd(y_sim_avg, na.rm=T)), 
                       sd_model=c(NA,sd(solution$y_model_array)))

## fit plots
  melted_s_summary_table<-melt(s_summary_table[1:7,])
  
  ## FIGURE A1, left (study fit plot)
  tikz('./output/plot_study_fit_by_type.tex', standAlone=F)
    ggplot(data=melted_s_summary_table, aes(x=Var1, y=value, fill=Var2)) + geom_bar(stat="identity", position="dodge") + labs(x="", y="own study time (hours/day)", fill="") + coord_cartesian(ylim=c(min(melted_s_summary_table$value)*0.8, max(melted_s_summary_table$value)*1.05)) + theme_bw()+ theme(legend.position="bottom", legend.key = element_blank(),legend.box.just = "left")
  dev.off()
  
  melted_y_summary_table<-melt(y_summary_table[1:7,])
  
  ## FIGURE A1, right (GPA fit plot)
  tikz('./output/plot_y_fit_by_type.tex', standAlone=F)
    ggplot(data=melted_y_summary_table, aes(x=Var1, y=value, fill=Var2)) + geom_bar(stat="identity", position="dodge") + labs(x="", y="GPA", fill="")  + coord_cartesian(ylim=c(min(melted_y_summary_table$value)*0.8, max(melted_y_summary_table$value)*1.05)) + theme_bw()+ theme(legend.position="bottom", legend.key = element_blank(),legend.box.just = "left")
  dev.off()  
  
## summarize data, model arrays
source("desc_A_arrays.R")

#### compute equilibrium effects
source("equilibrium_effects.R")

#### comparative statics
source("comparative_statics.R")

## compare residuals from study time and GPA regressions
  s_residual_array<-s_obs_array - s_model_array
  y_residual_array<-y_obs_array - y_model_array
  s_residual_array[s_obs_array==0]<-NA
  y_residual_array[y_obs_array==0]<-NA
  y_residual_array[y_obs_array==4]<-NA
  
## semester level averages
    s_residual_sem_avg_array<-array(NA, dim=c(n_students, 2))
    s_weights_array<-array(NA, dim=c(n_students, 2))
    s_residual_sem_avg_array[,1]<-rowMeans(s_residual_array[,1:4], na.rm=T)
    s_residual_sem_avg_array[,2]<-rowMeans(s_residual_array[,5:8], na.rm=T)
    s_weights_array[,1]<-rowSums(!is.na(s_residual_array[,1:4]), na.rm=T)
    s_weights_array[,2]<-rowSums(!is.na(s_residual_array[,5:8]), na.rm=T)
    s_weights_array<-sqrt(s_weights_array)
    s_avg_residual_vec<-as.numeric(s_residual_sem_avg_array)
    y_residual_vec<-as.numeric(y_residual_array)
    s_weights_vec<-as.numeric(s_weights_array)
        
    ## remove outliers
    extreme_pctile<-0.99
    s_extreme_cutoffs<-quantile(s_avg_residual_vec, c(1-extreme_pctile, extreme_pctile), na.rm=T)
    y_extreme_cutoffs<-quantile(y_residual_vec, c(1-extreme_pctile, extreme_pctile), na.rm=T)
    s_avg_residual_vec[s_avg_residual_vec<s_extreme_cutoffs[1] | s_avg_residual_vec>s_extreme_cutoffs[2]]<-NA
    y_residual_vec[y_residual_vec<s_extreme_cutoffs[1] | y_residual_vec>y_extreme_cutoffs[2]]<-NA
    plot(s_avg_residual_vec, y_residual_vec)
    summary(lm(y_residual_vec ~ s_avg_residual_vec, weights=s_weights_vec))

## Create A array summaries for each survey, to match
A_array_surveys<-array(NA, dim=c(n_students, n_students, n_study_surveys))
n_friends_array_surveys<-array(NA, dim=c(n_students, n_study_surveys))
for (survey in 1:n_study_surveys)
{
  A_array_surveys[,,survey]<-A_array_months[,,month_corresponding_to_survey[survey]]
  n_friends_array_surveys[,survey]<-rowSums(A_array_surveys[,,survey])
}

study_model_df<-data.frame(s_sim_array)
names(study_model_df)<-paste0("s", 1:8)
melted_study_model_df<-melt(data.frame(id=1:n_students, var="study", mode="model", study_model_df), id.vars=c("id", "var", "mode"))

study_data_df<-data.frame(s_obs_array)
names(study_data_df)<-paste0("s", 1:8)
melted_study_data_df<-melt(data.frame(id=1:n_students, var="study", mode="data",study_data_df ), id.vars=c("id", "var", "mode"))

## before combining into one study df, add student characteristics
  student_characteristics<-student_data[,c("race","sex","hsgpa_level")]
  melted_study_df<-rbind(cbind(melted_study_model_df,student_characteristics), cbind(melted_study_data_df,student_characteristics))

y_model_df<-data.frame(y_sim_array)
names(y_model_df)<-c("sem1", "sem2")
melted_y_model_df<-melt(data.frame(id=1:n_students, var="y", mode="model", y_model_df), id.vars=c("id", "var", "mode"))

y_data_df<-data.frame(y_obs_array)
names(y_data_df)<-c("sem1", "sem2")
melted_y_data_df<-melt(data.frame(id=1:n_students, var="y", mode="data", y_data_df), id.vars=c("id", "var", "mode"))

melted_y_df<-rbind(cbind(melted_y_model_df,student_characteristics), cbind(melted_y_data_df,student_characteristics))

## own and friend study time
  study_frstudy_df<-data.frame(s_avg= s_obs_avg, s_not_i_avg=s_not_i_obs_avg, mode="obs")
  study_frstudy_df<-rbind(study_frstudy_df, data.frame(s_avg= s_sim_avg, s_not_i_avg=s_not_i_sim_avg, mode="sim"))

  ## FIGURE A2
  tikz('./output/plot_s_s_not_i_fit.tex', standAlone=F)
    ggplot(study_frstudy_df, aes(x=s_not_i_avg,y=s_avg, color=mode, linetype=mode)) + geom_point(alpha=0.3) + theme_bw() + stat_smooth(se=F) + theme(legend.title=element_blank()) + labs(x="friend study time (hours/day)", y="own study time (hours/day)")+ theme(legend.position="bottom", legend.key = element_blank(),legend.box.just = "left")
  dev.off()

### summarize stuff
with(melted_study_df, tapply(value, list(variable, mode), mean, na.rm=T))
with(melted_study_df, tapply(value, list(mode), mean, na.rm=T))
with(melted_y_df, tapply(value, list(variable, mode), mean, na.rm=T))
with(melted_y_df, tapply(value, list(mode), mean, na.rm=T))

study_fit_plot_by_survey<-ggplot(data=melted_study_df, aes(x=value, color=mode)) + geom_density() + facet_wrap(~variable)
y_fit_plot_by_sem<-ggplot(data=melted_y_df, aes(x=value, color=mode)) + geom_density() + facet_wrap(~variable)

study_fit_plot<-ggplot(data=melted_study_df, aes(x=value, color=mode)) + geom_density()
y_fit_plot<-ggplot(data=melted_y_df, aes(x=value, color=mode)) + geom_density()

#### SEMESTER LEVEL DATA
  ### Aggregate into semester level data
  s_obs_sem1<-rowMeans(s_obs_array[,which(month_corresponding_to_survey==1)], na.rm=T)
  s_obs_sem2<-rowMeans(s_obs_array[,which(month_corresponding_to_survey==2)], na.rm=T)
  tmp<-s_sim_array[,which(month_corresponding_to_survey==1)]
  tmp[is.na(s_obs_array[,which(month_corresponding_to_survey==1)])]<-NA
  s_sim_sem1<-rowMeans(tmp, na.rm=T)
  tmp<-s_sim_array[,which(month_corresponding_to_survey==2)]
  tmp[is.na(s_obs_array[,which(month_corresponding_to_survey==2)])]<-NA
  s_sim_sem2<-rowMeans(tmp, na.rm=T)
  ## note: gpa is already at semester level
  y_obs_sem1<-y_obs_array[,1]
  y_obs_sem2<-y_obs_array[,2]
  y_sim_sem1<-y_sim_array[,1]
  y_sim_sem2<-y_sim_array[,2]

  sem1_study_df<-data.frame(id=1:n_students, sem=1, var="study", obs=s_obs_sem1, sim=s_sim_sem1, student_characteristics, n_friends=rowSums(A_array_months[,,1]))
  sem2_study_df<-data.frame(id=1:n_students, sem=2, var="study", obs=s_obs_sem2, sim=s_sim_sem2, student_characteristics, n_friends=rowSums(A_array_months[,,2]))
  sem_study_df<-rbind(sem1_study_df, sem2_study_df)

  sem1_y_df<-data.frame(id=1:n_students, sem=1, var="gpa", obs=y_obs_sem1, sim=y_sim_sem1, student_characteristics, n_friends=rowSums(A_array_months[,,1]))
  sem2_y_df<-data.frame(id=1:n_students, sem=2, var="gpa", obs=y_obs_sem2, sim=y_sim_sem2, student_characteristics, n_friends=rowSums(A_array_months[,,2]))
  sem_y_df<-rbind(sem1_y_df, sem2_y_df)

  melted_sem_study_df<-melt(sem_study_df, measure.vars = c("obs", "sim"))
  melted_sem_y_df<-melt(sem_y_df, measure.vars = c("obs", "sim"))

  sem_study_fit_plot<-ggplot(data=melted_sem_study_df, aes(x=value, color=variable)) + stat_ecdf()
  sem_y_fit_plot<-ggplot(data=melted_sem_y_df, aes(x=value, color=variable)) + stat_ecdf()

## tables showing total study, gpa
total_study_table<-rbind( "study total"=with(melted_sem_study_df, tapply(value, variable, mean, na.rm=T)), "study sd"=with(melted_sem_study_df, tapply(value, variable, sd, na.rm=T)))

total_y_table<-rbind("gpa total"=with(melted_sem_y_df, tapply(value, variable, mean, na.rm=T)), "gpa sd"=with(melted_sem_y_df, tapply(value, variable, sd, na.rm=T)))

total_table<-rbind(total_study_table, total_y_table)

print(total_table)
write.csv(total_table,file="./output/total_table_baseline.csv")

## model sem-avg table
s_model_sem1<-rowMeans(s_model_array[,1:4], na.rm=T)
s_model_sem2<-rowMeans(s_model_array[,5:8], na.rm=T)
y_model_sem1<-y_model_array[,1]
y_model_sem2<-y_model_array[,2]

s_y_model_sem_avg_mean_sd<- cbind(cbind(s_mean=mean(c(s_model_sem1, s_model_sem2), na.rm=T), s_sd=sd(c(s_model_sem1, s_model_sem2), na.rm=T)), cbind(y_mean=mean(c(y_model_sem1, y_model_sem2), na.rm=T), y_sd=sd(c(y_model_sem1, y_model_sem2), na.rm=T)))
                         
## study time best response
s_pred_points<-seq(1,5,.01)
s_obs_loess<-loess(s_obs_avg ~ s_not_i_obs_avg)
s_obs_loess_pred<-predict(s_obs_loess, data.frame(s_not_i_obs_avg=s_pred_points))
s_sim_loess<-loess(s_sim_avg ~ s_not_i_sim_avg)
s_sim_loess_pred<-predict(s_sim_loess, data.frame(s_not_i_sim_avg=s_pred_points))

plot(s_pred_points,s_obs_loess_pred, t="l", ylim=c(2.5,4),main="BR plots, data and sim")
points(s_pred_points,s_sim_loess_pred, t="l", col="blue")

## test scores on own study time
y_s_obs_loess<-loess(y_obs_avg ~ s_obs_avg)
y_s_obs_loess_pred<-predict(y_s_obs_loess, data.frame(s_obs_avg=s_pred_points))
y_s_sim_loess<-loess(y_sim_avg ~ s_sim_avg)
y_s_sim_loess_pred<-predict(y_s_sim_loess, data.frame(s_sim_avg=s_pred_points))

plot(s_pred_points,y_s_obs_loess_pred, t="l",main="GPA on s, data and sim")
points(s_pred_points,y_s_sim_loess_pred, t="l", col="blue")

## test scores on friend study time
y_s_not_i_obs_loess<-loess(y_obs_avg ~ s_not_i_obs_avg)
y_s_not_i_obs_loess_pred<-predict(y_s_not_i_obs_loess, data.frame(s_not_i_obs_avg=s_pred_points))
y_s_not_i_sim_loess<-loess(y_sim_avg ~ s_not_i_sim_avg)
y_s_not_i_sim_loess_pred<-predict(y_s_not_i_sim_loess, data.frame(s_not_i_sim_avg=s_pred_points))

plot(s_pred_points,y_s_not_i_obs_loess_pred, t="l",main="GPA on friend s, data and sim")
points(s_pred_points,y_s_not_i_sim_loess_pred, t="l", col="blue")

## compare own and friend study time regressions, data vs model
data_study_reg<-lm(s_obs_avg ~ hsgpa + male + black + studyhs + s_not_i_obs_avg, data=student_data)
## for sim reg use only students who have observed 
sim_study_reg<-lm(s_sim_avg ~ hsgpa + male + black + studyhs +  s_not_i_sim_avg, data=student_data)


study_reg_table<-cbind(obs=coef(data_study_reg),sim=coef(sim_study_reg))
print(study_reg_table)
stargazer(study_reg_table, out="./output/study_reg_table.tex")

## compute in-sample r-squared for study time and GPA
## compare for students retained in asymmetric
load("asym_pids")  
    
  sem1_s_obs_avg<-rowMeans(s_obs_array[,1:4], na.rm=T)
  sem2_s_obs_avg<-rowMeans(s_obs_array[,5:8], na.rm=T)
  sem1_s_model_avg<-rowMeans(s_model_array[,1:4], na.rm=T)
  sem2_s_model_avg<-rowMeans(s_model_array[,5:8], na.rm=T)
  
  results_for_rsq<-data.frame(pid=student_data$pid, sem1_s_obs_avg, sem2_s_obs_avg, sem1_s_model_avg, sem2_s_model_avg, sem1_y_model=y_model_array[,1], sem2_y_model=y_model_array[,2], sem1_y_obs=student_data$gpa1, sem2_y_obs=student_data$gpa2)

  results_for_rsq<-subset(results_for_rsq, pid %in% asym_pids)

  sem_s_obs_avg_stacked<-with(results_for_rsq, c(sem1_s_obs_avg, sem2_s_obs_avg))
  sem_s_model_avg_stacked<-with(results_for_rsq, c(sem1_s_model_avg, sem2_s_model_avg))
  
  study_rsq_reg<-lm(sem_s_obs_avg_stacked ~ sem_s_model_avg_stacked)
  
  sem_y_obs_stacked<-with(results_for_rsq, c(sem1_y_obs, sem2_y_obs))
  sem_y_model_stacked<-with(results_for_rsq, c(sem1_y_model, sem2_y_model))
  
  gpa_rsq_reg<-lm(sem_y_obs_stacked ~ sem_y_model_stacked)
  
  stargazer(study_rsq_reg, gpa_rsq_reg, out="./output/rsq_regs.tex")

## make data frame collecting all results to compare with other simulated scenarios

results_df<-data.frame(student_data,mu_s=solution$mu_study_model, mu_y=solution$mu_y_model, sem1_s_model=sem1_s_model_avg, sem2_s_model=sem2_s_model_avg, sem1_s_not_i_model=s_not_i_model_array[,1], sem2_s_not_i_model=s_not_i_model_array[,5], sem1_n_friends=rowSums(A_array_months[,,1]), sem2_n_friends=rowSums(A_array_months[,,2]), sem1_y_model=y_model_array[,1], sem2_y_model=y_model_array[,2], sem1_y_growth=y_growth_array[,1], sem2_y_growth=y_growth_array[,2])

save(results_df, file="./output/results_df_baseline")

